rm(list = ls())
coeftest.cluster <- function(data, fm, cluster1=NULL, cluster2=NULL, ret="test") {
  
  library(sandwich)
  library(lmtest)
  
  data <- as.data.frame(data)
  
  # variable is provided
  if (is.null(cluster1)) {
    if (ret=="cov") {
      return(vcovHC(fm, type = "HC0"))
    } else {
      return(coeftest(fm, vcov = vcovHC(fm, type = "HC0")))
    }
  }
  
  # Calculation shared by covariance estimates
  est.fun <- estfun(fm)
  
  # Need to identify observations used in the regression (i.e.,
  # non-missing) values, as the cluster vectors come from the full
  # data set and may not be in the regression model.
  inc.obs <- !is.na(est.fun[,1])
  est.fun <- est.fun[inc.obs,]
  
  # Shared data for degrees-of-freedom corrections
  N  <- dim(fm$model)[1]
  NROW <- NROW(est.fun)
  K  <- fm$rank
  
  # Calculate the sandwich covariance estimate
  cov <- function(cluster) {
    cluster <- factor(cluster, exclude=NULL)
    
    # Calculate the "meat" of the sandwich estimators
    u <- apply(est.fun, 2, function(x) tapply(x, cluster, sum))
    meat <- crossprod(u)/N
    
    # Calculations for degrees-of-freedom corrections, followed
    # by calculation of the variance-covariance estimate.
    # NOTE: NROW/N is a kluge to address the fact that sandwich
    # uses the wrong number of rows (includes rows omitted from
    # the regression).
    M <- length(levels(cluster))
    dfc <- M/(M-1) * (N-1)/(N-K)
    
    #print (sandwich(fm, meat=meat))
    return(dfc * NROW/N * sandwich(fm, meat=meat))
  }
  
  # Calculate the covariance matrix estimate for the first cluster.
  cluster1 <- data[inc.obs, cluster1]
  cov1  <- cov(cluster1)
  # print(cov1)
  
  if (is.null(cluster2)) {
    # If only one cluster supplied, return single cluster
    # results
    if (ret=="cov") {
      return(cov1)
    } else {
      return(coeftest(fm, cov1))
    }
  } else {
    # Otherwise do the calculations for the second cluster
    # and the "intersection" cluster.
    cluster2 <- data[inc.obs, cluster2]
    cluster12 <- paste(cluster1, cluster2, sep="")
    
    # Calculate the covariance matrices for cluster2, the "intersection"
    # cluster, then then put all the pieces together.
    cov2   <- cov(cluster2)
    cov12  <- cov(cluster12)
    covMCL <- (cov1 + cov2 - cov12)
    
    # Return the output of coeftest using two-way cluster-robust
    # standard errors.
    # print(ret)
    if (ret=="cov") {
      return(covMCL)
    } else {
      return(coeftest(fm, covMCL))
    }
  }
}

load("dataBJPOLS.RData")

# --------------------------------------------------
# First stage - full sample
# --------------------------------------------------
inst.1 <- "judgeiv_hd"
endo.1 <- "pti"
outc.1 <- "vote_post"

time.controls <- "as.factor(court_time1) + as.factor(court_time2) + as.factor(court_dow) + as.factor(severity)"
demo.controls <- "age + I(age^2) +  as.factor(race4) + female + vote_pre + as.factor(noteli) + regis_before"
case.controls <- "as.factor(any_drug) +  as.factor(any_weapon) +  as.factor(any_prop) + as.factor(any_prior_case)"

# --------------------------------------------
# "Randomization test" - full sample
# --------------------------------------------
## Column 1
ss3a <- ivreg(pti ~  age + I(age^2) + as.factor(female) + as.factor(race4)
              + as.factor(any_drug) + as.factor(any_prop) +  as.factor(any_weapon) 
              + as.factor(court_time1) + as.factor(court_time2)  + as.factor(severity) + as.factor(court_dow) 
              + as.factor(any_prior_case)
              + vote_pre + as.factor(noteli) + regis_before
              , data = last.cases)

ss3a0 <- ivreg(pti ~ as.factor(court_time1) + as.factor(court_time2)  + as.factor(severity) + as.factor(court_dow), 
               data = last.cases)

## Column 2
ss3b <- ivreg(judgeiv_hd ~ 
                age + I(age^2) + as.factor(female) + as.factor(race4)
              + as.factor(any_drug) + as.factor(any_prop) +  as.factor(any_weapon) 
              + as.factor(court_time1) + as.factor(court_time2) + as.factor(severity) + as.factor(court_dow) 
              + as.factor(any_prior_case)
              + vote_pre + as.factor(noteli) + regis_before
              , data = last.cases)
ss3b0 <- ivreg(judgeiv_hd ~ + as.factor(court_time1) + as.factor(court_time2)  + as.factor(severity) + as.factor(court_dow)
               , data = last.cases)
anova(ss3b, ss3b0)

## Coefficients
t1 <- cbind(round(coeftest.cluster(last.cases, ss3a, cluster1 = "judge_cat"),5)[,1], 
            round(coeftest.cluster(last.cases, ss3b, cluster1 = "judge_cat"),5)[,1])


## Std Errors
results.1 <- results.2 <- list()
for(i in 1:500) {
  print(i)
  out <- last.cases[, this_id_fa[sample.int(.N, .N, TRUE)], by = c("judge_cat", "court_time1")]
  last.cases.boot <- last.cases[last.cases$this_id_fa %in% out$V1, ]

  ss3aB <- ivreg(pti ~  age + I(age^2) + as.factor(female) + as.factor(race4)
                + as.factor(any_drug) + as.factor(any_prop) +  as.factor(any_weapon) 
                + as.factor(court_time1) + as.factor(court_time2) + as.factor(severity) + as.factor(court_dow) 
                + as.factor(any_prior_case)
                + vote_pre + as.factor(noteli) + regis_before
                , data = last.cases.boot)

  ss3bB <- ivreg(judgeiv_hd ~ 
                  age + I(age^2) + as.factor(female) + as.factor(race4)
                + as.factor(any_drug) + as.factor(any_prop) +  as.factor(any_weapon) 
                + as.factor(court_time1) + as.factor(court_time2) + as.factor(severity) + as.factor(court_dow) 
                + as.factor(any_prior_case)
                + vote_pre + as.factor(noteli) + regis_before
                , data = last.cases.boot)
  
  results.1[[i]] <- as.matrix(coefficients(ss3aB))
  results.2[[i]] <- as.matrix(coefficients(ss3bB))
}

col1sd <- as.matrix(apply(do.call('cbind', results.1), 1, sd, na.rm =T))
col2sd <- as.matrix(apply(do.call('cbind', results.2), 1, sd, na.rm =T))

t2 <- cbind(col1sd, col2sd)

## Variables to report in the table
keep <- c("age",
          "I(age^2)",
          "as.factor(female)1",
          "as.factor(race4)H",
          "as.factor(race4)W",
          "as.factor(any_drug)1",
          "as.factor(any_prop)1",
          "as.factor(any_weapon)1",
          "as.factor(any_prior_case)1",
          "vote_pre",
          "as.factor(noteli)1",
          "regis_before"
)
## Coeffs
coefs <- as.data.frame(t1[rownames(t1) %in% keep, ])
names(coefs) <- c("Pretrial Incarceration Est.", "Magistrate Leniency Est.")
print(coefs)

## Std Errors
se <- as.data.frame(t2[rownames(t2) %in% keep, ])
names(se) <- c("Pretrial Incarceration SE", "Magistrate Leniency SE")
cat("\nPrinting Table H2 standard errors...\n")
print(se)

## Joint F-test
f1 <- print(round((anova(ss3a, ss3a0)$F)[2],2))
f2 <- print(round((anova(ss3b, ss3b0)$F)[2], 3))
f <- c(f1, 0, f2, 0)

table <- cbind(coefs[,1], se[,1], coefs[,2], se[,2])
table <- cbind(coefs, se)
table <- table[,c(1, 3, 2,4)]

table <- rbind(table, f)
var.labels <- c("Age", "Age^2", "Female", "Hispanic", "White", "Any Drug Charge",
                "Any Property Charge", "Any Weapon Charge", "Any Prior Case", "Prior Voter", 
                "Voting Age Ineligible in Prior Election", "Registered Pretreatment")
rownames(table) <- c(var.labels, "Joint F. Stat.")
table


